One variable plots

#Density plot
ggplot(nhanes_small, aes( x = bmi)) + 
    geom_density()
## Warning: Removed 366 rows containing non-finite values (stat_density).

#histogram

ggplot(nhanes_small, aes(x = bmi)) +
    geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 366 rows containing non-finite values (stat_bin).

ggplot(nhanes_small, aes (x = sex)) +
    geom_bar()

ggplot(nhanes_small, aes(x = diabetes)) +
    geom_bar()

nhanes_tidied <- nhanes_small %>% 
    filter(!is.na(diabetes))

# Safe the new dataframe overwrite is the old data set
usethis::use_data(nhanes_tidied, overwrite = T)
## ✓ Setting active project to '/Users/schaarup/Desktop/LearningR'
## ✓ Saving 'nhanes_tidied' to 'data/nhanes_tidied.rda'
## • Document your data (see 'https://r-pkgs.org/data.html')
ggplot(nhanes_tidied, aes(x= diabetes))+
    geom_bar()

Plotting two variables

bmi_chol <- ggplot(nhanes_tidied, aes (x = bmi, y = tot_chol))

# doing a scatter plot
bmi_chol+
    geom_point()
## Warning: Removed 1457 rows containing missing values (geom_point).

bmi_chol+
    geom_hex()
## Warning: Removed 1457 rows containing non-finite values (stat_binhex).

bmi_chol+
    geom_smooth()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 1457 rows containing non-finite values (stat_smooth).

bmi_chol +
    geom_hex()+
    geom_smooth()
## Warning: Removed 1457 rows containing non-finite values (stat_binhex).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 1457 rows containing non-finite values (stat_smooth).

# With diabetes as the x-axis and the categorical amount distributed in sex
two_discrete <- nhanes_tidied %>% 
    ggplot(aes(x = diabetes, fill = sex))

two_discrete+
    geom_bar()

# Further separation
two_discrete + 
    geom_bar(position = position_dodge())

two_mixed <- nhanes_tidied %>% 
    ggplot(aes(x = diabetes, y = bmi))

two_mixed+
    geom_boxplot()
## Warning: Removed 229 rows containing non-finite values (stat_boxplot).

two_mixed+
    geom_jitter()
## Warning: Removed 229 rows containing missing values (geom_point).

two_mixed + 
    geom_violin()
## Warning: Removed 229 rows containing non-finite values (stat_ydensity).

Exercise 11.5

# 1a. Distribution of age
ggplot(nhanes_tidied, aes(x = age)) +
    geom_histogram(bin=30)
## Warning: Ignoring unknown parameters: bin
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# 1b. Distribution of age of diabetes diagnosis
ggplot(nhanes_tidied, aes(x = diabetes_age)) +
    geom_histogram(bins=30)
## Warning: Removed 9229 rows containing non-finite values (stat_bin).

# 2a. Number of people who smoke now
ggplot(nhanes_tidied, aes(x = smoke_now)) +
    geom_bar()

# 2b. Number of people who are physically active
ggplot(nhanes_tidied, aes(x = phys_active)) +
    geom_bar()

# 3a. BMI in relation to systolic blood pressure
ggplot(nhanes_tidied, aes(x = bmi, y = bp_sys_ave)) +
    geom_hex()
## Warning: Removed 1376 rows containing non-finite values (stat_binhex).

# 3b. BMI relation to diastolic blood pressure
ggplot(nhanes_tidied, aes(x = bmi, y = bp_dia_ave)) +
    geom_hex()
## Warning: Removed 1376 rows containing non-finite values (stat_binhex).

# 4. Physically active people with or without diabetes
ggplot(nhanes_tidied, aes(x = diabetes, fill = phys_active)) +
    geom_bar(position = position_dodge())

# 5. Poverty levels between those with or without diabetes
ggplot(nhanes_tidied, aes(x = diabetes, y = poverty)) +
    geom_violin()
## Warning: Removed 710 rows containing non-finite values (stat_ydensity).

Plotting three or more variables

# 1 continous and two discrite variables
nhanes_tidied %>% 
    ggplot(aes(x = sex, y = bp_sys_ave, colour = diabetes))+
    geom_boxplot()
## Warning: Removed 1312 rows containing non-finite values (stat_boxplot).

# 3 continous variables
nhanes_tidied %>% 
    ggplot( aes(x = bmi, y = bp_sys_ave, colour= age)) +
    geom_point()
## Warning: Removed 1376 rows containing missing values (geom_point).

# 2 continous and 1 discrete variable
nhanes_tidied %>% 
    ggplot(aes( x = bmi, y = bp_sys_ave, colour= diabetes))+
    geom_point()
## Warning: Removed 1376 rows containing missing values (geom_point).

# Side-by side
nhanes_tidied %>% 
    ggplot(aes( x = bmi, y= bp_sys_ave))+
    geom_point()+
    facet_grid(cols = vars(diabetes))
## Warning: Removed 1376 rows containing missing values (geom_point).

# stacked
nhanes_tidied %>% 
    ggplot(aes( x = bmi, y= bp_sys_ave))+
    geom_point()+
    facet_grid(rows = vars(diabetes))
## Warning: Removed 1376 rows containing missing values (geom_point).

# 5-variables
nhanes_tidied %>% 
    ggplot(aes(x = bmi, y = bp_sys_ave, colour= age))+
    geom_point()+
    facet_grid(rows = vars(diabetes), cols = vars(sex))
## Warning: Removed 1376 rows containing missing values (geom_point).